#define vec2 float2
#define vec4 float4
#define rgb xyz
#define rgba xyzw

typedef struct
{
	int width[8];
	int height[8];
	float cur_time;
	float total_time;
	float origROI[4];
	float resultROI[4];
	float angle;
}FilterParam;

const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

static int get_global_id0(__global FilterParam* param)
{
	return get_global_id(0) - param->origROI[0]* param->width[0];
}

static int get_global_id1(__global FilterParam* param)
{
	return get_global_id(1) - param->origROI[1]* param->height[0];
}

vec4 INPUTSRC(image2d_t src_data, __global FilterParam* param, vec2 tc)
{
	tc = (vec2)(tc.x, tc.y)*(vec2)(param->origROI[2], param->origROI[3]) + (vec2)(param->origROI[0], param->origROI[1]);
	return read_imagef(src_data, sampler, tc);
}

vec4 INPUT(image2d_t ovelay1,  __global FilterParam* param, vec2 tc)
{
	return read_imagef(ovelay1, sampler, (vec2)(tc.x,tc.y) );
}


	// Filter Blue Explosion
__kernel void filter_BlueExplosion(__read_only image2d_t merge,   		// image merge
								   __read_only image2d_t image,   		// image buffer
								   __global uchar* table,   			// input buffer table
								   __write_only image2d_t retImage,   	// image result	
								   __private int alpha,					// blend factor, scrope[0-100]	
								   __global FilterParam* param)				
{
	
	int w = get_global_id(0);
	int h = get_global_id(1);
	int width = get_image_width(image);
	int height = get_image_height(image);

	if(w >= width || h >= height)
			return;
	
	float2 resolution = (float2)(width,height);
	vec2 fragCoord = (vec2)(get_global_id0( param), get_global_id1( param));
	vec2 uv = ((vec2)(fragCoord.x, fragCoord.y) + (vec2)(0.5f))/resolution.xy;

	float4 color0 = INPUT(merge, param, uv);
	float4 color1 = INPUTSRC(image, param,uv);

	uchar sr = color0.x * 255;
	uchar sg = color0.y * 255;
	uchar sb = color0.z * 255;
	uchar dr = color1.x * 255;
	uchar dg = color1.y * 255;
	uchar db = color1.z * 255;
	
	uchar ret_b = table[(db << 8) + sb];
	uchar ret_g = table[(dg << 8) + sg];
	uchar ret_r = table[(dr << 8) + sr];
	
	float factor = (float)(alpha)/100.0f;
	
	ret_b = (uchar)(ret_b * factor + (1.0f - factor) * db);
	ret_g = (uchar)(ret_g * factor + (1.0f - factor) * dg);
	ret_r = (uchar)(ret_r * factor + (1.0f - factor) * dr);
	
	float4 retColor = (float4)((ret_r) / 255.0f, (ret_g) / 255.0f, (ret_b) / 255.0f, color1.w);
	
	write_imagef(retImage, (int2)(w, h), retColor);
}

// Filter Wrap
int clamp0(int large, int value)
{
	return (value < 0 ? 0 : (value >= large ? (large  -1) : value));
}

__kernel void filter_Wrap(__read_only image2d_t image,   	// image input ruffer
						  __write_only image2d_t retImage,  // output ruffer result
						  __private int alpha)	 			// blend factor, scrope[0-100]	
{
	int w = get_global_id(0);
	int h = get_global_id(1);
	int width = get_image_width(image);
	int height = get_image_height(image);

	if(w >= width || h >= height)
			return;
	const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
	

	float4 color = read_imagef(image, sampler, (int2)(w, h));
	
	
	if(w < 1 || w > (width - 1) || h < 1 || h > (height - 1))
	{
		write_imagef(retImage, (int2)(w, h), color);
	}   
	else
	{
		uchar sr = color.x * 255;
		uchar sg = color.y * 255;
		uchar sb = color.z * 255;
		uchar sa = color.w * 255;
	
		int topU0 = clamp0(width, (w - 1)), topV0 = clamp0(height, (h + 1));
		int topU1 = w, topV1 = clamp0(height, (h + 1));
		int topU2 = clamp0(width, (w + 1)), topV2 = clamp0(height, (h + 1));
		int midU0 = clamp0(width, (w - 1)), midV0 = h;
		int midU1 = w, midV1 = h;
		int midU2 = clamp0(width, (w + 1)), midV2 = h;
		int bomU0 = clamp0(width, (w - 1)), bomV0 = clamp0(height, (h - 1));
		int bomU1 = w, bomV1 = clamp0(height, (h - 1));
		int bomU2 = clamp0(width, (w + 1)), bomV2 = clamp0(height, (h - 1));
		
		float4 topColor0 = read_imagef(image, sampler, (int2)(topU0, topV0)) * 255.0f;
		uchar top_r0 = topColor0.x, top_g0 = topColor0.y, top_b0 = topColor0.z, top_a0 = topColor0.w;
		float4 topColor1 = read_imagef(image, sampler, (int2)(topU1, topV1)) * 255.0f;
		uchar top_r1 = topColor1.x, top_g1 = topColor1.y, top_b1 = topColor1.z, top_a1 = topColor1.w;
		float4 topColor2 = read_imagef(image, sampler, (int2)(topU2, topV2)) * 255.0f;
		uchar top_r2 = topColor2.x, top_g2 = topColor2.y, top_b2 = topColor2.z, top_a2 = topColor2.w;
		
		float4 midColor0 = read_imagef(image, sampler, (int2)(midU0, midV0)) * 255.0f;
		uchar mid_r0 = midColor0.x, mid_g0 = midColor0.y, mid_b0 = midColor0.z, mid_a0 = midColor0.w;
		float4 midColor1 = read_imagef(image, sampler, (int2)(midU1, midV1)) * 255.0f;
		uchar mid_r1 = midColor1.x, mid_g1 = midColor1.y, mid_b1 = midColor1.z, mid_a1 = midColor1.w;
		float4 midColor2 = read_imagef(image, sampler, (int2)(midU2, midV2)) * 255.0f;
		uchar mid_r2 = midColor2.x, mid_g2 = midColor2.y, mid_b2 = midColor2.z, mid_a2 = midColor2.w;
		
		float4 bomColor0 = read_imagef(image, sampler, (int2)(bomU0, bomV0)) * 255.0f;
		uchar bom_r0 = bomColor0.x, bom_g0 = bomColor0.y, bom_b0 = bomColor0.z, bom_a0 = bomColor0.w;
		float4 bomColor1 = read_imagef(image, sampler, (int2)(bomU1, bomV1)) * 255.0f;
		uchar bom_r1 = bomColor1.x, bom_g1 = bomColor1.y, bom_b1 = bomColor1.z, bom_a1 = bomColor1.w;
		float4 bomColor2 = read_imagef(image, sampler, (int2)(bomU2, bomV2)) * 255.0f;
		uchar bom_r2 = bomColor2.x, bom_g2 = bomColor2.y, bom_b2 = bomColor2.z, bom_a2 = bomColor2.w;

		float factor = (float)(alpha)/100.0f;
	
		uchar b = (uchar)((top_b0 + 2 * top_b1 + top_b2 + 2 * mid_b0 + 4 * mid_b1 + 2 * mid_b2 + bom_b0 + 2 * bom_b1 + bom_b2) / 16.0f);
		uchar g = (uchar)((top_g0 + 2 * top_g1 + top_g2 + 2 * mid_g0 + 4 * mid_g1 + 2 * mid_g2 + bom_g0 + 2 * bom_g1 + bom_g2) / 16.0f);
		uchar r = (uchar)((top_r0 + 2 * top_r1 + top_r2 + 2 * mid_r0 + 4 * mid_r1 + 2 * mid_r2 + bom_r0 + 2 * bom_r1 + bom_r2) / 16.0f);
		uchar a = (uchar)((top_a0 + 2 * top_a1 + top_a2 + 2 * mid_a0 + 4 * mid_a1 + 2 * mid_a2 + bom_a0 + 2 * bom_a1 + bom_a2) / 16.0f);
		
		write_imagef(retImage, (int2)(w, h), (float4)(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f));
	}
}

// Filter Rise
#define LIMIT_VALUE(value, min, max) ((value) < (min) ? (min) : ((value) < (max) ? (value) : (max)))
#define AmPI 3.1415926f

uchar calTableValue(int x, int iPercent)
{
	float aPercent = 1.0f + 0.005f * LIMIT_VALUE(iPercent, 0, 100);
	float angle = aPercent * AmPI * (x - 128) / 256;
	angle = LIMIT_VALUE(angle, -AmPI / 2, AmPI / 2);
	uchar ret = (uchar)(128 + 128 * sin(angle));
	
	return ret;
}

__kernel void filter_Rise(__read_only image2d_t image,   	// image input ruffer
						  __global uchar* table,   			// input buffer table
						  __global int* tableR,   			// input buffer tableR
						  __global int* tableG,   			// input buffer tableG
						  __global int* tableB,   			// input buffer tableB
						  __write_only image2d_t retImage,  // output ruffer result
						  __private int iPercent,
						  __private int deta,
						  __private int alpha)	 			// blend factor, scrope[0-100]	
{
	int w = get_global_id(0);
	int h = get_global_id(1);
	int width = get_image_width(image);
	int height = get_image_height(image);

	if(w >= width || h >= height)
			return;
	const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
	

	float4 color = read_imagef(image, sampler, (int2)(w, h));
	
	uchar sr = color.x * 255;
	uchar sg = color.y * 255;
	uchar sb = color.z * 255;
	
	uchar gray;
	gray = ( tableR[sr]  + tableG[sg]  + tableB[sb]  + 512 ) >> 10; 
	gray = calTableValue(gray, iPercent);
	
	float factor = (float)(alpha)/100.0f;
	
	uchar b = (uchar)(gray * factor + (1.0f - factor) * sb);
	uchar g = (uchar)(table[gray + deta] * factor + (1.0f - factor) * sg);
	uchar r = (uchar)(table[gray + deta + 6]* factor + (1.0f - factor) * sr);
	
	write_imagef(retImage, (int2)(w, h), (float4)(r / 255.0f, g / 255.0f, b / 255.0f, color.w));
}